library(foreign)
library(effects)
library(nnet)
library(MASS)
library(dplyr)
library(tidyverse)
library(plyr) 
library(haven)
library(simcf)
library(ggplot2)
options(scipen = 20)

## sink (file = "analysis_out.txt", append = FALSE)


## attach the vbf object unless it is already attached
if (!length(grep("vbf_output", search()))) {
  cat ("Attaching vbf... this may take a few moments...")
  attach("vbf_output")
}

########################################################
################# MATCHING ANALYSIS ####################
########################################################

## filter latinos out of vbf
vbf <- filter(vbf, ethnicity == "latino_not_yes")

## filter inactive voters out of vbf
vbf <- filter(vbf, voter_status_desc_2012 == "ACTIVE" & voter_status_desc_2016 == "ACTIVE")

## Read all border pairings
borders2use <- read_delim (file = "county-early-voting-differences.csv",
                           delim = "\t",
                           col_names = TRUE)
cat ("Read ", nrow(borders2use), " county borders in North Carolina.\n", sep = "")

## Create vector that describes the five ways that county borders can be ranked
measures2use <-  c("totalhoursDID", "sitesDID", "evehoursDID", "sathoursDID", "sunhoursDID")

## set up variables that facilitate different tests
races2use <- c("black", "white") 
parties2use <- c("dem", "rep")
votetypes2use <- c("abs2012", "ed2012", "eip2012", "novote2012")

## print header line in test output file.  This erases
## tests_output.txt, if this file already exists.  Note as well that
## the order of the columns is very important and must match the
## output of test.

cat("border_measure", "\t",             # The type of measure
    "border_measure_value", "\t",       # Its value
    "borderrank", "\t",                 # Its rank among all borders
    "test_type", "\t",                  # 
    "race", "\t",                       # 
    "party", "\t",                      #
    "county1", "\t",                    # name of county1
    "county2", "\t",                    # and county2
    "value1", "\t",      # EIP statistic used to rank borders, county1
    "value2", "\t",      # and county2
    "proportion1", "\t",  # estimated percentage for county1
    "proportion2", "\t",  # and county2
    "test_statistic", "\t", # test statistic for difference in proportions test
    "p_value",              # p-value for test
    "\n",                   # print line feed after header line
    file = "tests_output.txt",
    sep = "")

## Loop over measure for ranking county borders
for (measure2use in measures2use) {
  
  cat ("\n**** Border measure: ", measure2use, "\n\n", sep = "")
  
  ## order borders2use depending on statistic measure2use
  if (measure2use == "totalhoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$totalhoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sitesDID") {
    borders2use <- borders2use[order(as.double(borders2use$sitesDID), decreasing = TRUE),]
  }
  else if (measure2use == "evehoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$evehoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sathoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$sathoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sunhoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$sunhoursDID), decreasing = TRUE),]
  }
  else {
    stop ("Error in measure2use.  Do not know how to sort county borders.\n")
  }
  
  ## Loop over county borders (control/treatment pairs of counties)
  for (borderrank in 1:nrow(borders2use)) {
    ## treatment county: county1
    ## control county: county2
    ## Note that considering counties 1 and 2 treatment and control, respectively, is WLOG
    cat ("Working on ranked border ", borderrank, " of ", nrow(borders2use), ".\n", sep = "")
    cat ("\tCounty 1: ", borders2use$county1[borderrank], "\n\tCounty 2: ", borders2use$county2[borderrank], "\n", sep = "")
    
    if (measure2use == "totalhoursDID") {
      value1 <- borders2use$totalhoursChg1[borderrank]
      value2 <- borders2use$totalhoursChg2[borderrank]
    }
    else if (measure2use == "sitesDID") {
      value1 <- borders2use$sitesChg1[borderrank]
      value2 <- borders2use$sitesChg2[borderrank]
    }
    else if (measure2use == "evehoursDID") {
      value1 <- borders2use$evehoursChg1[borderrank]
      value2 <- borders2use$evehoursChg2[borderrank]
    }
    else if (measure2use == "sathoursDID") {
      value1 <- borders2use$sathoursChg1[borderrank]
      value2 <- borders2use$sathoursChg2[borderrank]
    }
    else if (measure2use == "sunhoursDID") {
      value1 <- borders2use$sunhoursChg1[borderrank]
      value2 <- borders2use$sunhoursChg2[borderrank]
    }
    else {
      stop ("Error in measure2use.  Cannot assign treatment and control values (value1 and value2).\n")
    }
    
    ## Loop over registered voter race
    for (race2use in races2use) {
      ## Loop over registered voter party
      for (party2use in parties2use) {
        cat ("------\nRace: ", race2use, "\nParty: ", party2use, "\n", sep = "")
        
        ## make query text
        text_treatment <- paste("COUNTY_NAM == \"", borders2use$county1[borderrank], "\" & borders_", borders2use$county2[borderrank]," == TRUE & race == \"", race2use, "\" & party == \"", party2use, "\"", sep = "")
        text_control <- paste("COUNTY_NAM == \"", borders2use$county2[borderrank], "\" & borders_", borders2use$county1[borderrank]," == TRUE & race == \"", race2use, "\" & party == \"", party2use, "\"", sep = "")
        
        ## execute queries with filter_
        vbf_treatment <- vbf %>% filter_(text_treatment)
        vbf_control <- vbf %>% filter_(text_control)
        
        ##  Initialize 4x4 results matrices
        matrix_treatment =  matrix_control = matrix (nrow = 0, ncol = 4)
        
        ## Fill in 4x4 results matrices, one for treatment county
        ## (county 1) and one for control county (county 2).
        ##
        ## The way that this works is by looping over 2012 outcomes in
        ## the order abs2012, ed2012, eip2012, novote2012.  Note: this
        ## order is very important.  It must match the 2016 order in
        ## the code below.          
        for (votetype2use in votetypes2use) {
          filter2use <- paste("outcome2012 == \"", votetype2use, "\"", sep = "")
          
          row2use_treatment = vbf_treatment %>% filter_(filter2use) %>% summarise (abs2016 = sum(outcome2016 == "abs2016"),
                                                                                   ed2016 = sum(outcome2016 == "ed2016"), 
                                                                                   eip2016 = sum(outcome2016 == "eip2016"), 
                                                                                   novote2016 = sum(outcome2016 == "novote2016"))
          matrix_treatment <- rbind (matrix_treatment, row2use_treatment)
          
          ## row2use_control = vbf_control %>% filter_(filter2use) %>% summarise (abs2012 = sum(outcome2016 == "abs2016"),
          ##                                                                      ed2012 = sum(outcome2016 == "ed2016"), 
          ##                                                                      eip2016 = sum(outcome2016 == "eip2016"), 
          ##                                                                      novote2016 = sum(outcome2016 == "novote2016"))

          row2use_control = vbf_control %>% filter_(filter2use) %>% summarise (abs2016 = sum(outcome2016 == "abs2016"),
                                                                               ed2016 = sum(outcome2016 == "ed2016"), 
                                                                               eip2016 = sum(outcome2016 == "eip2016"), 
                                                                               novote2016 = sum(outcome2016 == "novote2016"))
          matrix_control <- rbind (matrix_control, row2use_control)
        }
        ## put names on the treatment and control matrices
        row.names (matrix_treatment) = votetypes2use
        row.names (matrix_control) = votetypes2use
        
        ## Done creating 4x4 matrices.  Now aggregate to get
        ## corresponding 2x2 matrices that are only vote / no-vote.
        
        matrix_control_small = matrix (nrow = 2, ncol = 2, byrow = TRUE,
                                       c(matrix_control[match("novote2012", votetypes2use),
                                                        match("novote2016", colnames(matrix_control))],
                                         sum(matrix_control[match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_control))]),
                                         sum(matrix_control[-match("novote2012", votetypes2use), match("novote2016", colnames(matrix_control))]),
                                         sum(matrix_control[-match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_control))])))
        row.names (matrix_control_small) <- c("novote2012", "vote2012")                                 
        colnames (matrix_control_small) <- c("novote2016", "vote2016")      
        
        matrix_treatment_small = matrix (nrow = 2, ncol = 2, byrow = TRUE,
                                         c(matrix_treatment[match("novote2012", votetypes2use),
                                                            match("novote2016", colnames(matrix_treatment))],
                                           sum(matrix_treatment[match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_treatment))]),
                                           sum(matrix_treatment[-match("novote2012", votetypes2use), match("novote2016", colnames(matrix_treatment))]),
                                           sum(matrix_treatment[-match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_treatment))])))
        row.names (matrix_treatment_small) <- c("novote2012", "vote2012")                                 
        colnames (matrix_treatment_small) <- c("novote2016", "vote2016")    
        
        ## Done creating 2x2 matrices.
        
        ## Test for changes in distribution of novote2012
        x <- c(matrix_control_small["novote2012","novote2016"],
               matrix_treatment_small["novote2012","novote2016"])
        
        n <- c(sum(matrix_control_small["novote2012",]),
               sum(matrix_treatment_small["novote2012",]))
        
        if (min(n) >= 20) {
          cat ("Changes in probability of novote2016 given novote2012.\n", sep = "")
          print(test_output <- prop.test(x = x, n = n))
          ## The order of the data lines printed below must match the
          ## order of the variables in the header line printed earlier!
          cat(measure2use, "\t",
              unlist(borders2use[borderrank, match(measure2use, names(borders2use))]), "\t",
              borderrank, "\t",
              "change_distr_novote2016_given_novote2012", "\t",
              race2use, "\t",
              party2use, "\t",
              borders2use$county1[borderrank], "\t",
              borders2use$county2[borderrank], "\t",
              value1, "\t",
              value2, "\t",
              test_output$estimate[1], "\t",
              test_output$estimate[2], "\t",
              test_output$statistic, "\t",
              test_output$p.value,
              "\n",
              file = "tests_output.txt",
              sep = "",
              append = TRUE)
        }
        else {
          cat ("No test for changes in distribution of novote2012 (n too small).\n", sep = "")
        }
        
        ## test for changes in distribution of vote2012
        x <- c(matrix_control_small["vote2012","vote2016"],
               matrix_treatment_small["vote2012","vote2016"])
        
        n <- c(sum(matrix_control_small["vote2012",]),
               sum(matrix_treatment_small["vote2012",]))
        
        if (min(n) >= 20) {
          cat ("Changes in distribution of vote2016 given vote2012.\n", sep = "")
          print(test_output <- prop.test(x = x, n = n))
          cat(measure2use, "\t",
              unlist(borders2use[borderrank, match(measure2use, names(borders2use))]), "\t",
              borderrank, "\t",
              "change_distr_vote2016_given_vote2012", "\t",
              race2use, "\t",
              party2use, "\t",
              borders2use$county1[borderrank], "\t",
              borders2use$county2[borderrank], "\t",
              value1, "\t",
              value2, "\t",
              test_output$estimate[1], "\t",
              test_output$estimate[2], "\t",
              test_output$statistic, "\t",
              test_output$p.value,
              "\n",
              file = "tests_output.txt",
              sep = "",
              append = TRUE)
        }
        else {
          cat ("No test for changes in distribution of vote2012 (n too small).\n", sep = "")
        }
      }
    }
  }
}


## sink()
